from pipeline example described by Pierre-Luc Germain, course sta426 UZH

Loading necessary libraries

# set local path
local.path <- getwd()
setwd(local.path)

# use source() function to source the functions we want to execute
source("functions/functions_qc.R")
source("functions/functions_analysis.R")

Preprocessing & Clustering

Loading the data

## data can be downloaded from : http://imlspenticton.uzh.ch/dump/files_for_levesque.tar

#pick your poison

#sces <- read_previous_data()
sces <- read_and_qc(6)
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient1_HS"
## [1] "Summary of discarded antibodies:"
##    Mode   FALSE    TRUE 
## logical   69647  667633 
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"

## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
##    Mode   FALSE    TRUE    NA's 
## logical  357639    4241  375400 
##        Limited
## Sig      FALSE   TRUE
##   FALSE 357639      0
##   TRUE       2   4239
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "

## [1] "Summary of the cells identified as dead cells:"
##    Mode   FALSE    TRUE 
## logical    3643     598 
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"

## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
##     LibSize    NExprs  MitoProp     Total
##   <integer> <integer> <integer> <integer>
## 1        41        66       598       663
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"

## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
## 
## singlet doublet 
##    3451     127 
## [1] "Percentage of removed cells:"
## [1] 99.53193
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient1_SCC"
## [1] "Summary of discarded antibodies:"
##    Mode   FALSE    TRUE 
## logical   68998  668282 
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"

## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
##    Mode   FALSE    TRUE    NA's 
## logical  265491    2816  468973 
##        Limited
## Sig      FALSE   TRUE
##   FALSE 265491      0
##   TRUE       0   2816
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "

## [1] "Summary of the cells identified as dead cells:"
##    Mode   FALSE    TRUE 
## logical    2405     411 
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"

## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
##     LibSize    NExprs  MitoProp     Total
##   <integer> <integer> <integer> <integer>
## 1        24        40       411       448
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"

## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
## 
## singlet doublet 
##    2310      58 
## [1] "Percentage of removed cells:"
## [1] 99.68669
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient2_HS"
## [1] "Summary of discarded antibodies:"
##    Mode   FALSE    TRUE 
## logical   70856  666424 
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"

## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
##    Mode   FALSE    TRUE    NA's 
## logical  348153    9225  379902 
##        Limited
## Sig      FALSE   TRUE
##   FALSE 348153      0
##   TRUE      88   9137
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "

## [1] "Summary of the cells identified as dead cells:"
##    Mode   FALSE    TRUE 
## logical    8229     996 
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"

## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
##     LibSize    NExprs  MitoProp     Total
##   <integer> <integer> <integer> <integer>
## 1        11        21       996      1015
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"

## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
## 
## singlet doublet 
##    7508     702 
## [1] "Percentage of removed cells:"
## [1] 98.98166
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient2_AK"
## [1] "Summary of discarded antibodies:"
##    Mode   FALSE    TRUE 
## logical   68400  668880 
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"

## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
##    Mode   FALSE    TRUE    NA's 
## logical  334067    5231  397982 
##        Limited
## Sig      FALSE   TRUE
##   FALSE 334067      0
##   TRUE      25   5206
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "

## [1] "Summary of the cells identified as dead cells:"
##    Mode   FALSE    TRUE 
## logical    4480     751 
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"

## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
##     LibSize    NExprs  MitoProp     Total
##   <integer> <integer> <integer> <integer>
## 1       128       127       751       824
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"

## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
## 
## singlet doublet 
##    4198     209 
## [1] "Percentage of removed cells:"
## [1] 99.43061
#sces <- read_10x()
sces <- lapply(sces, split_data)

# only select the gene expression data, drop the antibody capture
sce.patient1_HS <- sces[[1]]
sce.patient1_SCC <- sces[[2]]
sce.patient2_HS <- sces[[3]]
sce.patient2_AK <- sces[[4]]

# add name to sce for output graph
attr(sce.patient1_HS, "name") <- "Patient1 HS"
attr(sce.patient1_SCC, "name") <- "Patient1 SCC"
attr(sce.patient2_HS, "name") <- "Patient2 HS"
attr(sce.patient2_AK, "name") <- "Patient2 AK"

#convert rownames
# rownames(sce.patient1_HS) <- convert_rownames(sce.patient1_HS)
# rownames(sce.patient1_SCC) <- convert_rownames(sce.patient1_SCC)
# rownames(sce.patient2_HS) <- convert_rownames(sce.patient2_HS)
# rownames(sce.patient2_AK) <- convert_rownames(sce.patient2_AK)

#make sure every rowname has the format ENS_ID.Gene_Name
rownames(sce.patient1_HS) <- paste_rownames(sce.patient1_HS)
rownames(sce.patient1_SCC) <- paste_rownames(sce.patient1_SCC)
rownames(sce.patient2_HS) <- paste_rownames(sce.patient2_HS)
rownames(sce.patient2_AK) <- paste_rownames(sce.patient2_AK)

Normalization & reduction

The markers found in the literature Kim, Chung, and Kim (2020) Solé-Boldo (2020) were used to separate the different cell types and their subtypes within them. A few markers didn’t appear at all in our data (WISP2 [Secretory-reticular fibroblast], SEPP1 [Macrophage], TYP1 [Melanocyte]) so the corresponding cell types were selected using their remaining markers present. The Pericyte and vSMC markers were removed because they clustered badly and polluted the Fibroblast population. Since those markers are known to correlate with specific cell types, their genes were added to the list of high variable genes used to produce a lower dimensional representation of the data, even when their expression didn’t vary enough to be selected as highly variable.

Types Subtypes Markers
Keratinocyte DSC3 DSP LGALS7
basal KRT5 KRT14 COL17A1
suprabasal KRT1 KRT10
differentiated LOR SPINK5
ORS KRT6B
channel GJB2 GJB6 ATP1B1
sebaceous gland MGST1 FASN
sweat gland DCD AQP5
Fibroblast DCN LUM MMP2
secretory reticular SLPI CTHRC1 MFAP5 TSPAN8
proinflammatory CCL19 APOE CXCL2 CXCL3 EFEMP1
secretory papillary APCDD1 ID1 WIF1 COL18A1 PTGDS
mesenchymal ASPN POSTN GPC3 TNN SFRP1
Pericyte & vSMC ACTA2 TAGLN
pericyte RGS5
vSMC MYL9 TPM2 RERGL
Endothelial cell PECAM1 SELE CLDN5 VWF
lymphatic PROX1 LYVE1
Myeloid HLA-DRA FCER1G TYROBP AIF1
dendritic CD1C
langerhans CD207
macrophage CD68 RNASE1 ITGAX
Lymphocyte CD3D CD3E CD52 IL7R
Melanocyte DCT MLANA PMEL
Schwann cell CDH19 NGFR
Mitotic cell UBE2C PCNA
# run PCA

# using known genes
# genes not found in the data : WISP2" "SEPP1" "TYP1" 
# genes linked to pericytevSMC : "ACTA2","TAGLN"
# were removed from the following list
known_markers <- c("DSC3","DSP","LGALS7","KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5","DCN","LUM","MMP2","SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1","RGS5","MYL9","TPM2","RERGL","PECAM1","SELE","CLDN5","VWF","PROX1","LYVE1","HLA-DRA","FCER1G","TYROBP","AIF1","CD1C","CD207","CD68","RNASE1","ITGAX","CD3D","CD3E","CD52","IL7R","DCT","MLANA","PMEL","CDH19","NGFR","UBE2C","PCNA")

sce.patient1_HS <- sc_PCA(sce.patient1_HS, hvg.patient1_HS, known_markers)
## Adding IL7R NGFR to the gene set used for PCA in Patient1 HS

sce.patient1_SCC <- sc_PCA(sce.patient1_SCC, hvg.patient1_SCC, known_markers)
## Adding KRT10 APOE TNN AIF1 CD3E NGFR to the gene set used for PCA in Patient1 SCC

sce.patient2_HS <- sc_PCA(sce.patient2_HS, hvg.patient2_HS, known_markers)
## Adding COL17A1 ID1 AIF1 ITGAX CD3E to the gene set used for PCA in Patient2 HS

sce.patient2_AK <- sc_PCA(sce.patient2_AK, hvg.patient2_AK, known_markers)
## Adding CXCL2 GPC3 TNN FCER1G ITGAX CD3E to the gene set used for PCA in Patient2 AK

Clustering

results <- sc_cluster(sce.patient1_HS)

sce.patient1_HS <- results[[1]]
g.patient1_HS <- results[[2]]

results <- sc_cluster(sce.patient1_SCC)

sce.patient1_SCC <- results[[1]]
g.patient1_SCC <- results[[2]]

results <- sc_cluster(sce.patient2_HS)

sce.patient2_HS <- results[[1]]
g.patient2_HS <- results[[2]]

results <- sc_cluster(sce.patient2_AK)

sce.patient2_AK <- results[[1]]
g.patient2_AK <- results[[2]]

Cluster annotation

Known markers

# genes not found in our data : "DCDN" "WISP2" "SEPP1" "TYP1" 
# were removed from the list below
genes <- list(
  # classification: https://www.sciencedirect.com/science/article/pii/S0923181120301985?via%3Dihub#bib0050
  # KERATINOCYTE
  keratinocyte = c("DSC3","DSP","LGALS7"),
  #keratinocyte_basal = c("KRT5","KRT14","COL17A1"),
  #keratinocyte_suprabasal = c("KRT1","KRT10"),
  #keratinocyte_differentiated = c("LOR", "SPINK5"),
  #keratinocyte_ORS = c("KRT6B"),
  #keratinocyte_channel = c("GJB2","GJB6","ATP1B1"),
  #keratinocyte_sebaceous_gland = c("MGST1","FASN"),
  #keratinocyte_sweat_gland  = c("DCD","AQP5"),
  # FIBROBLAST
  fibroblast  = c("DCN","LUM","MMP2"),
  # fibroblast subclassification  from: https://www.nature.com/articles/s42003-020-0922-4#
  #fibroblast_secretory_reticular = c(SLPI","CTHRC1","MFAP5","TSPAN8"),
  #fibroblast_proinflammatory = c("CCL19","APOE","CXCL2","CXCL3","EFEMP1"),
  #fibroblast_secretory_papillary = c("APCDD1","ID1","WIF1","COL18A1","PTGDS"),
  #fibroblast_mesenchymal = c("ASPN","POSTN","GPC3","TNN","SFRP1"),
  # PERICYTE & vSMC
  pericytevSMC  = c("ACTA2","TAGLN"), #vSMC : vascular smooth muscel cell
  #pericytevSMC_pericyte  = c("RGS5"),
  #pericytevSMC_vSMC  = c("MYL9","TPM2","RERGL"),
  # ENDOTHELIAL CELL
  endothelial  = c("PECAM1","SELE","CLDN5","VWF"),
  #endothelial_lymphatic  = c("PROX1","LYVE1"),
  # MYELOID CELL
  myeloid  = c("HLA-DRA","FCER1G","TYROBP","AIF1"),
  #myeloid_dendritic  = c("CD1C"),
  #myeloid_langerhans  = c("CD207"),
  #myeloid_macrophage = c("CD68","RNASE1","ITGAX"),
  # LYMPHOCYTE
  lymphocyte  = c("CD3D","CD3E","CD52","IL7R"),
  # MELANOCYTE
  melanocyte  = c("DCT","MLANA","PMEL"),
  # SCHWANN CELL
  schwann  = c("CDH19","NGFR"),
  # MITOTIC CELL
  mitotic  = c("UBE2C","PCNA")
)

# TODO: try to move the convert_rownames step higher in a block without output so we're less
# likely to run it multiple time (which would lead to weird output)
#rownames(sce.patient1_HS) <- convert_rownames(sce.patient1_HS, g.patient1_HS, genes)
results <- annotate_cells(sce.patient1_HS, g.patient1_HS, genes)
sce.patient1_HS <- results[[1]]
km.patient1_HS <- results[[2]]

#rownames(sce.patient1_SCC) <- convert_rownames(sce.patient1_SCC, g.patient1_SCC, genes)
results <- annotate_cells(sce.patient1_SCC, g.patient1_SCC, genes)
sce.patient1_SCC <- results[[1]]
km.patient1_SCC <- results[[2]]

#rownames(sce.patient2_HS) <- convert_rownames(sce.patient2_HS, g.patient2_HS, genes)
results <- annotate_cells(sce.patient2_HS, g.patient2_HS, genes)
sce.patient2_HS <- results[[1]]
km.patient2_HS <- results[[2]]

#rownames(sce.patient2_AK) <- convert_rownames(sce.patient2_AK, g.patient2_AK, genes)
results <- annotate_cells(sce.patient2_AK, g.patient2_AK, genes)
sce.patient2_AK <- results[[1]]
km.patient2_AK <- results[[2]]

Pseudo-bulk aggregation

results <- pseudobulk(sce.patient1_HS, km.patient1_HS)

sce.patient1_HS <- results[[1]]
pb.patient1_HS <- results[[2]]
mat.patient1_HS <- results[[3]]
cl2.patient1_HS <- results[[4]]

#Patient 1 SCC
results <- pseudobulk(sce.patient1_SCC, km.patient1_SCC)

sce.patient1_SCC <- results[[1]]
pb.patient1_SCC <- results[[2]]
mat.patient1_SCC <- results[[3]]
cl2.patient1_SCC <- results[[4]]

#Patient 2 HS
results <- pseudobulk(sce.patient2_HS, km.patient2_HS)

sce.patient2_HS <- results[[1]]
pb.patient2_HS <- results[[2]]
mat.patient2_HS <- results[[3]]
cl2.patient2_HS <- results[[4]]

#Patient 2 AK
results <- pseudobulk(sce.patient2_AK, km.patient2_AK)

sce.patient2_AK <- results[[1]]
pb.patient2_AK <- results[[2]]
mat.patient2_AK <- results[[3]]
cl2.patient2_AK <- results[[4]]

# create barplots of cell type proportions
df.celltypes.patient1 <- rbind(df_barplot_celltypes(sce.patient1_HS), df_barplot_celltypes(sce.patient1_SCC))
df.celltypes.patient1$Type <- c("HS", "HS", "HS", "HS", "HS", "HS", "HS", "HS", "SCC", "SCC", "SCC", "SCC", "SCC", "SCC", "SCC", "SCC")

df.celltypes.patient2 <- rbind(df_barplot_celltypes(sce.patient2_HS), df_barplot_celltypes(sce.patient2_AK))
df.celltypes.patient2$Type <- c("HS", "HS", "HS", "HS", "HS", "HS", "HS", "HS", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK")
  
dynamic_barplot(df.celltypes.patient1, "Patient 1", T, "Proportion of cell types in")

dynamic_barplot(df.celltypes.patient2, "Patient 2", T, "Proportion of cell types in")

Sub population

known_markers.kera <- c("KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5")

#annotate the subclusters
  genes.kera <- list(
  # classification: https://www.sciencedirect.com/science/article/pii/S0923181120301985?via%3Dihub#bib0050
    # KERATINOCYTE
    #keratinocyte = c("DSC3","DSP","LGALS7"),
    basal = c("KRT5","KRT14","COL17A1"),
    suprabasal = c("KRT1","KRT10"),
    differentiated = c("LOR", "SPINK5"),
    ORS = c("KRT6B"),
    channel = c("GJB2","GJB6","ATP1B1"),
    sebaceous_gland = c("MGST1","FASN"),
    sweat_gland  = c("DCD","AQP5")
    )
  
  
# TODO : find a way to avoid giving both known_merkers.kera and genes.kera since both contain the 
  # same information in essence. Maybe will be fixed by the first todo
  # yeah but then we are running an additional for loop/maybe we want different ones (hypothetically at least)
sce.patient1_HS.kera <- cluster_subtype(sce.patient1_HS, "keratinocyte", known_markers.kera, genes.kera)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Adding KRT5 AQP5 to the gene set used for PCA in Patient1 HS

sce.patient1_SCC.kera <- cluster_subtype(sce.patient1_SCC, "keratinocyte", known_markers.kera, genes.kera)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Adding FASN to the gene set used for PCA in Patient1 SCC

sce.patient2_HS.kera <- cluster_subtype(sce.patient2_HS, "keratinocyte", known_markers.kera, genes.kera)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Adding KRT5 FASN to the gene set used for PCA in Patient2 HS

sce.patient2_AK.kera <- cluster_subtype(sce.patient2_AK, "keratinocyte", known_markers.kera, genes.kera)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Adding KRT5 to the gene set used for PCA in Patient2 AK

genes.dyn <- list(
  # classification: https://www.sciencedirect.com/science/article/pii/S0092867420306723
  keratinocyte_basal = "COL17A1",
  keratinocyte_cycling = "MKI67",
  keratinocyte_differentiating = "KRT1"
  )

kera.dyn.patient1_HS <- dynamic_plot(sce.patient1_HS.kera, g.patient1_HS.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 17092 1374 
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(17092): ENSG00000241860.AL627309.5 ENSG00000237491.LINC01409
##   ... ENSG00000278817.AC007325.4 ENSG00000277196.AC007325.2
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(1374): AAACCTGAGGGCTCTC-1 AAACCTGCAATGGAAT-1 ...
##   TTTGTCACAATCGGTT-1 TTTGTCAGTTCACCTC-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient1_SCC <- dynamic_plot(sce.patient1_SCC.kera, g.patient1_SCC.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 15583 482 
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(15583): ENSG00000237491.LINC01409 ENSG00000228794.LINC01128
##   ... ENSG00000277666.AC136352.3 ENSG00000273554.AC136616.1
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(482): AAACGGGGTCCGTGAC-1 AAACGGGGTCGGCACT-1 ...
##   TTTGCGCGTCGGCATC-1 TTTGGTTAGTCATGCT-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient2_HS <- dynamic_plot(sce.patient2_HS.kera, g.patient2_HS.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 15229 1593 
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(15229): ENSG00000237491.LINC01409 ENSG00000228794.LINC01128
##   ... ENSG00000271254.AC240274.1 ENSG00000277196.AC007325.2
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(1593): AAACCTGAGACATAAC-1 AAACGGGAGGAGCGAG-1 ...
##   TTTGTCAGTACGCACC-1 TTTGTCATCAGTCAGT-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient2_AK <- dynamic_plot(sce.patient2_AK.kera, g.patient2_AK.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 15578 450 
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(15578): ENSG00000237491.LINC01409 ENSG00000228794.LINC01128
##   ... ENSG00000273554.AC136616.1 ENSG00000277196.AC007325.2
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(450): AAAGTAGCACCGCTAG-1 AAATGCCTCGGAATCT-1 ...
##   TTTGGTTCATGAACCT-1 TTTGGTTGTGGTAACG-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient1 <- rbind(kera.dyn.patient1_HS, kera.dyn.patient1_SCC)
kera.dyn.patient1$Type <- c("HS", "HS", "HS", "SCC", "SCC", "SCC")

kera.dyn.patient2 <- rbind(kera.dyn.patient2_HS, kera.dyn.patient2_AK)
kera.dyn.patient2$Type <- c("HS", "HS", "HS", "AK", "AK", "AK")


dynamic_barplot(kera.dyn.patient1, "Patient 1", F, "Proportion of Keratinocyte states in")

dynamic_barplot(kera.dyn.patient2, "Patient 2", F, "Proportion of Keratinocyte states in")

known_markers.fibro <- c("SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1")

genes.fibro <- list(
   #fibroblast  = c("DCN","LUM","MMP2"),
   #fibroblast subclassification  from: https://www.nature.com/articles/s42003-020-0922-4#
   fibroblast_secretory_reticular = c("SLPI","CTHRC1","MFAP5","TSPAN8"),
   fibroblast_proinflammatory = c("CCL19","APOE","CXCL2","CXCL3","EFEMP1"),
   fibroblast_secretory_papillary = c("APCDD1","ID1","WIF1","COL18A1","PTGDS"),
   fibroblast_mesenchymal = c("ASPN","POSTN","GPC3","TNN","SFRP1")
)

sce.patient1_HS.fibro <- cluster_subtype(sce.patient1_HS, "fibroblast", known_markers.fibro, genes.fibro)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
## Adding WIF1 to the gene set used for PCA in Patient1 HS

sce.patient1_SCC.fibro <- cluster_subtype(sce.patient1_SCC, "fibroblast", known_markers.fibro, genes.fibro)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%

sce.patient2_HS.fibro <- cluster_subtype(sce.patient2_HS, "fibroblast", known_markers.fibro, genes.fibro)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%

sce.patient2_AK.fibro <- cluster_subtype(sce.patient2_AK, "fibroblast", known_markers.fibro, genes.fibro)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |======================================================================| 100%
## Adding CCL19 to the gene set used for PCA in Patient2 AK

The following code was initially used to check if the genes linked to the markers described in the literature were all found in the data set

data_markers <- rowData(sce.patient1_HS)$Symbol
known_markers <- c("DSC3","DSP","LGALS7","KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5","DCN","LUM","MMP2","WISP2","SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1","ACTA2","TAGLN","RGS5","MYL9","TPM2","RERGL","PECAM1","SELE","CLDN5","VWF","PROX1","LYVE1","HLA-DRA","FCER1G","TYROBP","AIF1","CD1C","CD207","CD68","RNASE1","SEPP1","ITGAX","CD3D","CD3E","CD52","IL7R","DCT","MLANA","PMEL","TYP1","CDH19","NGFR","UBE2C","PCNA")


idx_notfound <- which(!(known_markers %in% rowData(sce.patient1_HS)$Symbol))

print(paste("Done:", length(known_markers)-length(idx_notfound), "/", length(known_markers), "markers found"))
cat("Genes missing in dataset: ", known_markers[idx_notfound])

Kim, Doyoung, Kyung Bae Chung, and Tae-Gyun Kim. 2020. “Application of Single-Cell Rna Sequencing on Human Skin: Technical Evolution and Challenges.” Journal of Dermatological Science 99 (2): 74–81. https://doi.org/https://doi.org/10.1016/j.jdermsci.2020.06.002.

Solé-Boldo, Raddatz, L. 2020. “Single-Cell Transcriptomes of the Human Skin Reveal Age-Related Loss of Fibroblast Priming.” Communications Biology 3 (188). https://doi.org/https://doi.org/10.1038/s42003-020-0922-4.